/*LIS Cross-section Data center in Luxembourg*/

/*email: usersupport@lisdatacenter.org*/

/*LIS Self Teaching Package 2022*/
/*Part II: Gender, employment, and wages*/
/*SAS version*/

/*last change of this version of the syntax: 15-01-2022*/

/*The exercises in Part II emphasises the use of person-level data, including wages, 
demographics, and labour market information. Building-up on the techniques presented
in Part I, they introduce regression modelling and continue to lead you through 
the process of developing a comparative analysis on inequality and poverty across countries.*/


/*Exercise 1: Merging person and household data, selecting a sample*/

OPTIONS NONOTES NOSOURCE NOFMTERR NODATE NONUMBER NOCENTER LABEL LS=MAX PS=MAX ;
TITLE "";

%MACRO sample ;
	%LET i = 1 ;
	%DO %UNTIL (&i > 3) ;
		%LET ccyy = %SCAN(&all,&i) ;
		TITLE "Country   : &ccyy" ;

		DATA &ccyy.h ;
		 SET &&&ccyy.h (KEEP=hid own) ;
		RUN ;
		PROC SORT DATA=&ccyy.h ;
		  BY hid ;
		RUN ;
		DATA &ccyy.p ;
		 SET &&&ccyy.p (KEEP= hid dname pwgt ppopwgt relation partner  ageyoch age sex immigr educ 
							educ_c emp status1 ptime1 hwage1); 
		RUN ;
		PROC SORT DATA=&ccyy.p ;
		  BY hid ;
		RUN ;

		DATA &ccyy ;
	 	 MERGE &ccyy.h &ccyy.p ;
			BY hid ;
				IF ((25 <= age <= 54) AND (relation LE 2200)) ; 
				IF (100 <= own <= 199) THEN homeowner = 1;
				IF (200 <= own <= 299) THEN homeowner = 0;
		RUN ;
		PROC MEANS DATA=&ccyy ;
		  VAR	 homeowner ;
		  WEIGHT ppopwgt   ;
		RUN ;

		%LET i = %EVAL(&i+1) ;
	%END ;
%MEND sample ;

%LET all = us04 be04 gr04 ;
%sample



/*Exercise 2: Stacking data, employment rates by gender*/

OPTIONS NONOTES NOSOURCE NOFMTERR NODATE NONUMBER NOCENTER LABEL LS=MAX PS=MAX ;
TITLE "";

%MACRO stack ;
	%LET i = 1 ;
	%DO %UNTIL (&i > 3) ;
		%LET ccyy = %SCAN(&all,&i) ;
		DATA &ccyy.h ;
 	   SET &&&ccyy.h (KEEP=hid own) ;
 	  RUN ;
		PROC SORT DATA=&ccyy.h ;
		  BY hid ;
		RUN ;
		DATA &ccyy.p ;
		 SET &&&ccyy.p (KEEP=hid dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1) ; 
		RUN ;
		PROC SORT DATA=&ccyy.p ;
		  BY hid ;
		RUN ;
		DATA &ccyy ;
	 	 MERGE &ccyy.h &ccyy.p ;
			BY hid ;
				IF ((25 <= age <= 54) AND (relation LE 2200)) ; 
				IF (100 <= own <= 199) THEN homeowner = 1;
				IF (200 <= own <= 299) THEN homeowner = 0;
		RUN ;

		%IF %EVAL(&i) = 1 %THEN 
			%DO ;
				DATA current ;
				 SET &ccyy ;
				RUN ;
			%END ;
		%ELSE 
			%DO ;
				PROC APPEND BASE=current DATA=&ccyy FORCE ;
				RUN ;
			%END ;

		%LET i = %EVAL(&i+1) ;
	%END ;

	PROC SORT DATA=current ;
	  WHERE sex = 2;
	  BY dname ;
	RUN ;
	PROC FREQ DATA=current  ;
	  TABLES dname*emp / NOFREQ NOCOL NOCUM ;
	  WEIGHT ppopwgt   ;
	RUN ;
	PROC FREQ DATA=current  ;
	  WHERE emp = 1 & sex = 2;	
	  TABLES dname*ptime1 / NOFREQ NOCOL NOCUM ;
	  WEIGHT ppopwgt   ;
	RUN ;
%MEND stack ;
%LET all = us04 be04 gr04 ;
%stack


/*Exercise 3: Family structure and employment*/

OPTIONS NONOTES NOSOURCE NOFMTERR NODATE NONUMBER NOCENTER LABEL LS=MAX PS=MAX ;
TITLE "";

%MACRO family ;

	%LET i = 1 ;
	%DO %UNTIL (&i > 3) ;
		%LET ccyy = %SCAN(&all,&i) ;

		DATA &ccyy.h ;
		 SET &&&ccyy.h (KEEP=hid own) ;
		RUN ;
		PROC SORT DATA=&ccyy.h ;
		  BY hid ;
		RUN ;
		DATA &ccyy.p ;
		 SET &&&ccyy.p (KEEP=hid dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1) ; 
		RUN ;
		PROC SORT DATA=&ccyy.p ;
		  BY hid ;
		RUN ;
		DATA &ccyy ;
	 	 MERGE &ccyy.h &ccyy.p ;
			BY hid ;
				IF ((25 <= age <= 54) AND (relation LE 2200)) ; 
				IF (100 <= own <= 199) THEN homeowner = 1;
				IF (200 <= own <= 299) THEN homeowner = 0;
				IF	 (0 <= ageyoch < 6)	THEN achildcat  = 1 ;
				ELSE IF (6 <= ageyoch < 18) THEN achildcat  = 2 ;
				ELSE  achildcat  = 0 ;
		RUN ;

		%IF %EVAL(&i) = 1 %THEN 
			%DO ;
				DATA current ;
				 SET &ccyy ;
					ATTRIB achildcat   FORMAT=chcat. ;
				RUN ;
			%END ;
		%ELSE 
			%DO ;
				PROC APPEND BASE=current DATA=&ccyy FORCE ;
				RUN ;
			%END ;

		%LET i = %EVAL(&i+1) ;
	%END ;

	PROC SORT DATA=current ;
             WHERE sex = 2 ;
	  BY dname;
	RUN ;

	PROC FREQ DATA=current  ;
	  TABLES dname*partner*achildcat*emp / NOFREQ NOCOL NOCUM ;
	  WEIGHT ppopwgt   ;
	RUN ;

%MEND family ;

PROC FORMAT ;
	VALUE chcat 
		0 =  'no children <18'
		1 =  '<6 years'
		2 =  '6-17 years'
		. =  'missing'
	;
RUN ;

%LET all = us04 be04 gr04 ;
%family


/*Exercise 4: Dependent employment and hourly wages*/

OPTIONS NONOTES NOSOURCE NOFMTERR NODATE NONUMBER NOCENTER LABEL LS=MAX PS=MAX ;
TITLE "";
%MACRO depend ;
	%LET i = 1 ;
	%DO %UNTIL (&i > 3) ;
		%LET ccyy = %SCAN(&all,&i) ;
		DATA &ccyy.h ;
		 SET &&&ccyy.h (KEEP=hid did own) ;
		RUN ;

		PROC SORT DATA=&ccyy.h ;
		  BY hid ;
		RUN ;
		DATA &ccyy.p ;
		 SET &&&ccyy.p (KEEP=hid did dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1) ; 
		RUN ;
		PROC SORT DATA=&ccyy.p ;
		  BY hid ;
		RUN ;

		DATA &ccyy ;
	 	 MERGE &ccyy.h &ccyy.p ;
			BY hid ;
				IF ((25 <= age <= 54) AND (relation LE 2200)) ; 
				IF (100 <= own <= 199) THEN homeowner = 1;
				IF (200 <= own <= 299) THEN homeowner = 0;
				IF (0 <= ageyoch < 6) THEN achildcat  = 1 ;
				ELSE IF (6 <= ageyoch < 18) THEN achildcat  = 2;
				ELSE  achildcat  = 0 ;
				IF (100 <= status1 <= 120) THEN depemp = 1 ;
				ELSE IF (200 <= status1 <= 240) THEN depemp = 0 ;
				ELSE depemp = . ;
		RUN ;
	PROC SORT DATA=&ccyy ;
	  BY dname sex ;
	RUN ;

	PROC FREQ DATA=&ccyy  ;
	  WHERE emp = 1 ;	
	  TABLES dname*sex*depemp / NOFREQ NOCOL NOCUM ;
	  WEIGHT ppopwgt   ;
	RUN ;

		
		DATA &ccyy ;
		 SET &ccyy ;		
				hourwage = hwage1 ;
				IF hourwage=. THEN DELETE;
				IF (hourwage < 0 ) THEN hourwage = 0 ;
				hourwagelog=log(hourwage); 
				IF( (hourwagelog=.)  AND (hourwage^=.) ) THEN hourwagelog=0;
	PROC SORT DATA=&ccyy ;
	  BY did hourwagelog;
RUN ;
	PROC UNIVARIATE DATA=&ccyy NOPRINT ;
		  VAR hourwagelog ;
			WEIGHT ppopwgt ;
				OUTPUT OUT= temp P25=q25   P75=q75; 
		RUN ;
		DATA _NULL_; 
		  SET temp; 
			CALL SYMPUT("b",q25); 
			CALL SYMPUT("t",q75); 
		RUN; 
		DATA &ccyy ;
		 SET &ccyy ;
			iqr=&t-&b; 
			upper_bound=&t + (iqr * 3); 
			lower_bound=&b - (iqr * 3); 
			IF hourwage>exp(upper_bound) THEN hourwage=exp(upper_bound); 
			IF hourwage<exp(lower_bound) THEN hourwage=exp(lower_bound); 
		RUN ;

		%IF %EVAL(&i) = 1 %THEN 
			%DO ;
				DATA current ;
				 SET &ccyy ;
					ATTRIB achildcat   FORMAT=chcat. ;
				RUN ;
			%END ;
		%ELSE 
			%DO ;
				PROC APPEND BASE=current DATA=&ccyy FORCE ;
				RUN ;
			%END ;
		%LET i = %EVAL(&i+1) ;
	%END ;


  PROC MEANS DATA=current MEDIAN;
     CLASS     dname sex;
	   TYPES dname*sex ;
	   VAR hourwage ;
	  WEIGHT ppopwgt   ;
	RUN ;

%MEND depend ;


PROC FORMAT ;
	VALUE chcat 
		0 =  'no children <18'
		1 =  '<6 years'
		2 =  '6-17 years'
		. =	 'missing'
	;
RUN ;

%LET all = us04 be04 gr04 ;
%depend

/*Exercise 5: Hourly wages, education, and country-specific variables*/

OPTIONS NONOTES NOSOURCE NOFMTERR NODATE NONUMBER NOCENTER LABEL LS=MAX PS=MAX ;
TITLE "";
%MACRO educ ;
	%LET i = 1 ;
	%DO %UNTIL (&i > 3) ;
		%LET ccyy = %SCAN(&all,&i) ;
		DATA &ccyy.h ;
		 SET &&&ccyy.h (KEEP=hid did own) ;
		RUN ;

		PROC SORT DATA=&ccyy.h ;
		  BY hid ;
		RUN ;
		DATA &ccyy.p ;
		 SET &&&ccyy.p (KEEP=hid did dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1) ; 
		RUN ;
		PROC SORT DATA=&ccyy.p ;
		  BY hid ;
		RUN ;

		DATA &ccyy ;
	 	 MERGE &ccyy.h &ccyy.p ;
			BY hid ;
				IF ((25 <= age <= 54) AND (relation LE 2200)) ; 
				IF (100 <= own <= 199) THEN homeowner = 1;
				IF (200 <= own <= 299) THEN homeowner = 0;
				IF (0 <= ageyoch < 6) THEN achildcat  = 1 ;
				ELSE IF (6 <= ageyoch < 18) THEN achildcat  = 2;
				ELSE  achildcat  = 0 ;
				IF (100 <= status1 <= 120) THEN depemp = 1 ;
				ELSE IF (200 <= status1 <= 240) THEN depemp = 0 ;
				ELSE depemp = . ;
		RUN ;
	PROC SORT DATA=&ccyy ;
	  BY dname sex ;
	RUN ;

	PROC FREQ DATA=current  ;
	  TABLES dname*educ*educ_c / NOFREQ NOCOL NOCUM MISSING ;
	  WEIGHT ppopwgt   ;
	RUN ;

		
		DATA &ccyy ;
		 SET &ccyy ;		
				hourwage = hwage1 ;
				IF hourwage=. THEN DELETE;
				IF (hourwage < 0 ) THEN hourwage = 0 ;
				hourwagelog=log(hourwage); 
				IF( (hourwagelog=.)  AND (hourwage^=.) ) THEN hourwagelog=0;
	PROC SORT DATA=&ccyy ;
	  BY did hourwagelog;
RUN ;
	PROC UNIVARIATE DATA=&ccyy NOPRINT ;
		  VAR hourwagelog ;
			WEIGHT ppopwgt ;
				OUTPUT OUT= temp P25=q25   P75=q75; 
		RUN ;
		DATA _NULL_; 
		  SET temp; 
			CALL SYMPUT("b",q25); 
			CALL SYMPUT("t",q75); 
		RUN; 
		DATA &ccyy ;
		 SET &ccyy ;
			iqr=&t-&b; 
			upper_bound=&t + (iqr * 3); 
			lower_bound=&b - (iqr * 3); 
			IF hourwage>exp(upper_bound) THEN hourwage=exp(upper_bound); 
			IF hourwage<exp(lower_bound) THEN hourwage=exp(lower_bound); 
		RUN ;

		%IF %EVAL(&i) = 1 %THEN 
			%DO ;
				DATA current ;
				 SET &ccyy ;
					ATTRIB achildcat   FORMAT=chcat. ;
				RUN ;
			%END ;
		%ELSE 
			%DO ;
				PROC APPEND BASE=current DATA=&ccyy FORCE ;
				RUN ;
			%END ;
		%LET i = %EVAL(&i+1) ;
	%END ;


  PROC MEANS DATA=current MEDIAN;
     CLASS   dname sex educ ;
	   TYPES  dname*sex*educ ;
	   VAR    hourwage ;
	  WEIGHT  ppopwgt  ;
	RUN ;

%MEND educ ;


PROC FORMAT ;
	VALUE chcat 
		0 =  'no children <18'
		1 =  '<6 years'
		2 =  '6-17 years'
		. =	 'missing'
	;
RUN ;

%LET all = us04 be04 gr04 ;
%educ


/*Exercise 6: Immigration and wages, understanding harmonisation*/

OPTIONS NONOTES NOSOURCE NOFMTERR NODATE NONUMBER NOCENTER LABEL LS=MAX PS=MAX ;
TITLE "";

%MACRO immig ;
	%LET i = 1 ;
	%DO %UNTIL (&i > 3) ;
		%LET ccyy = %SCAN(&all,&i) ;
		DATA &ccyy.h ;
		 SET &&&ccyy.h (KEEP=hid did own) ;
		RUN ;

		PROC SORT DATA=&ccyy.h ;
		  BY hid ;
		RUN ;
		DATA &ccyy.p ;
		 SET &&&ccyy.p (KEEP=hid did dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1) ; 
		RUN ;
		PROC SORT DATA=&ccyy.p ;
		  BY hid ;
		RUN ;

		DATA &ccyy ;
	 	 MERGE &ccyy.h &ccyy.p ;
			BY hid ;
				IF ((25 <= age <= 54) AND (relation LE 2200)) ; 
				IF (100 <= own <= 199) THEN homeowner = 1;
				IF (200 <= own <= 299) THEN homeowner = 0;
				IF (0 <= ageyoch < 6) THEN achildcat  = 1 ;
				ELSE IF (6 <= ageyoch < 18) THEN achildcat  = 2;
				ELSE  achildcat  = 0 ;
				IF (100 <= status1 <= 120) THEN depemp = 1 ;
				ELSE IF (200 <= status1 <= 240) THEN depemp = 0 ;
				ELSE depemp = . ;
		RUN ;
	PROC SORT DATA=&ccyy ;
	  BY dname sex ;
	RUN ;

	PROC FREQ DATA=current  ;
	  TABLES dname*educ*educ_c / NOFREQ NOCOL NOCUM MISSING ;
	  WEIGHT ppopwgt   ;
	RUN ;

		
		DATA &ccyy ;
		 SET &ccyy ;		
				hourwage = hwage1 ;
				IF hourwage=. THEN DELETE;
				IF (hourwage < 0 ) THEN hourwage = 0 ;
				hourwagelog=log(hourwage); 
				IF( (hourwagelog=.)  AND (hourwage^=.) ) THEN hourwagelog=0;
	PROC SORT DATA=&ccyy ;
	  BY did hourwagelog;
RUN ;
	PROC UNIVARIATE DATA=&ccyy NOPRINT ;
		  VAR hourwagelog ;
			WEIGHT ppopwgt ;
				OUTPUT OUT= temp P25=q25   P75=q75; 
		RUN ;
		DATA _NULL_; 
		  SET temp; 
			CALL SYMPUT("b",q25); 
			CALL SYMPUT("t",q75); 
		RUN; 
		DATA &ccyy ;
		 SET &ccyy ;
			iqr=&t-&b; 
			upper_bound=&t + (iqr * 3); 
			lower_bound=&b - (iqr * 3); 
			IF hourwage>exp(upper_bound) THEN hourwage=exp(upper_bound); 
			IF hourwage<exp(lower_bound) THEN hourwage=exp(lower_bound); 
		RUN ;

		%IF %EVAL(&i) = 1 %THEN 
			%DO ;
				DATA current ;
				 SET &ccyy ;
					ATTRIB achildcat   FORMAT=chcat. ;
				RUN ;
			%END ;
		%ELSE 
			%DO ;
				PROC APPEND BASE=current DATA=&ccyy FORCE ;
				RUN ;
			%END ;
		%LET i = %EVAL(&i+1) ;
	%END ;
	
	PROC SORT DATA=current ;
	  BY dname sex ;
	RUN ;

	PROC MEANS DATA=current MEDIAN ;
       CLASS     dname sex immigr ;
	   TYPES     dname*sex*immigr ;
	   VAR hourwage ;
	  WEIGHT ppopwgt   ;
	RUN ;

%MEND immig ;


PROC FORMAT ;
	VALUE chcat 
		0 =  'no children <18'
		1 =  '<6 years'
		2 =  '6-17 years'
		. =	 'missing'
	;
RUN ;

%LET all = us04 be04 gr04 ;
%immig



/*Exercise 7: Wage regressions*/
OPTIONS NONOTES NOSOURCE NOFMTERR NODATE NONUMBER NOCENTER LABEL LS=MAX PS=MAX NOMPRINT NOMLOGIC NOSYMBOLGEN;
TITLE "";
%MACRO country ;

	%LET i = 1 ;
	%DO %UNTIL (&i > 3) ;
		%LET ccyy = %SCAN(&all,&i) ;

		DATA &ccyy.h ;
		 SET &&&ccyy.h (KEEP=hid own) ;
		RUN ;
		PROC SORT DATA=&ccyy.h ;
		  BY hid ;
		RUN ;
		DATA &ccyy.p ;
		 SET &&&ccyy.p (KEEP=hid did dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1) ; 
		RUN ;
		PROC SORT DATA=&ccyy.p ;
		  BY hid ;
		RUN ;

		DATA &ccyy ;
	 	 MERGE &ccyy.h &ccyy.p ;
			BY hid ;
				IF ((25 <= age <= 54) AND (relation LE 2200)) ; 
				IF (100 <= own <= 199) THEN homeowner = 1;
				IF (200 <= own <= 299) THEN homeowner = 0;
				IF	 (0 <= ageyoch < 6)	THEN achildcat  = 1 ;
				ELSE IF (6 <= ageyoch < 18) THEN achildcat  = 2 ;
				ELSE  achildcat  = 0 ;				
				IF 	 	(achildcat = 1) 			THEN youngchild = 1 ;
				ELSE IF (achildcat in (0,2))		THEN youngchild = 0 ;
				ELSE									 youngchild = . ;
				IF 	 	(achildcat = 2) 			THEN oldchild   = 1 ;
				ELSE IF (achildcat in (0,1))		THEN oldchild   = 0 ;
				ELSE									 oldchild   = . ;
				IF	(100 <= status1 <= 120) THEN depemp = 1 ;
				ELSE IF (200 <= status1 <= 240) THEN depemp = 0 ;
				ELSE					   depemp = . ;	
				agesq = age*age ;	
				FORMAT educ_c ;				
				IF 		(educ in (1,3))		THEN mededuc = 0 ;
				ELSE IF (educ = 2)			THEN mededuc = 1 ;
				ELSE  							 mededuc = . ; 
				IF 		(educ in (1,2))	THEN hieduc  = 0 ;
				ELSE IF (educ = 3)		THEN hieduc  = 1 ;
				ELSE  						 hieduc  = . ;	
						RUN ;
		DATA &ccyy ;
		 SET &ccyy ;	
				hourwage = hwage1 ;
				IF hourwage=. THEN DELETE;
				IF (hourwage < 0 ) THEN hourwage = 0 ;
				hourwagelog=log(hourwage); 
				IF( (hourwagelog=.)  AND (hourwage^=.) ) THEN hourwagelog=0;
	PROC SORT DATA=&ccyy ;
	  BY did hourwagelog;
RUN ;
				

		PROC SORT DATA=&ccyy ;
			BY did hourwagelog;
		RUN ;

		PROC UNIVARIATE DATA=&ccyy NOPRINT ;
		  VAR hourwagelog ;
			WEIGHT ppopwgt ;
				OUTPUT OUT= temp P25=q25   P75=q75; 
		RUN ;
		DATA _NULL_; 
		  SET temp; 
			CALL SYMPUT("b",q25); 
			CALL SYMPUT("t",q75); 
		RUN; 
		DATA &ccyy ;
		 SET &ccyy ;
			iqr=&t-&b; 
			upper_bound=&t + (iqr * 3); 
			lower_bound=&b - (iqr * 3); 
			IF hourwage>exp(upper_bound) THEN hourwage=exp(upper_bound); 
			IF hourwage<exp(lower_bound) THEN hourwage=exp(lower_bound); 
			logwage = LOG(hourwage) ;
		RUN ;

		%IF %EVAL(&i) = 1 %THEN 
			%DO ;
				DATA current ;
				 SET &ccyy ;
					ATTRIB achildcat   FORMAT=chcat. ;
				RUN ;
			%END ;
		%ELSE 
			%DO ;
				PROC APPEND BASE=current DATA=&ccyy FORCE ;
				RUN ;
			%END ;

		%LET i = %EVAL(&i+1) ;
	%END ;

	PROC SORT DATA=current ;
	  BY dname sex ;
	RUN ;

  PROC SURVEYREG;
	 BY dname sex  ;
	 WEIGHT ppopwgt ;
	 MODEL  logwage = age agesq mededuc hieduc immigr partner youngchild oldchild
                     ptime1 homeowner ;
  RUN;

%MEND country ;

PROC FORMAT ;
	VALUE chcat 
		0 =  'no children <18'
		1 =  '<6 years'
		2 =  '6-17 years'
		. =	 'missing'
	;
RUN ;

%LET all = us04 be04 gr04 ;
%country
 
 
/*Exercise 8: Pooled regressions and normalised weights*/

OPTIONS NONOTES NOSOURCE NOFMTERR NODATE NONUMBER NOCENTER LABEL LS=MAX PS=MAX NOMPRINT NOMLOGIC NOSYMBOLGEN;
TITLE "";

%MACRO pool ;

  %LET i = 1 ;
  %DO %UNTIL (&i > 3) ;

    %LET ccyy = %SCAN(&all,&i) ;
    DATA &ccyy.h ;
     SET &&&ccyy.h (KEEP=hid own) ;
    RUN ;
    PROC SORT DATA=&ccyy.h ;
      BY hid ;
    RUN ;
    DATA &ccyy.p ;
     SET &&&ccyy.p (KEEP=hid did dname pwgt ppopwgt relation partner ageyoch age sex immigr
                         educ educ_c emp status1 ptime1 hwage1); 
    RUN ;
    PROC SORT DATA=&ccyy.p ;
      BY hid ;
    RUN ;
        DATA &ccyy ;
      MERGE &ccyy.h &ccyy.p ;
      BY hid ;
        IF ((25 <= age <= 54) AND (relation LE 2200)) ; 
        IF (100 <= own <= 199) THEN homeowner = 1;
        IF (200 <= own <= 299) THEN homeowner = 0;
        IF	 (0 <= ageyoch < 6)	THEN achildcat  = 1 ;
        ELSE IF (6 <= ageyoch < 18)		THEN achildcat  = 2 ;
        ELSE  achildcat  = 0 ;	        
        IF      (achildcat = 1)           THEN youngchild = 1 ;
        ELSE IF (achildcat in (0,2))      THEN youngchild = 0 ;
        ELSE                                   youngchild = . ;
        IF      (achildcat = 2)           THEN oldchild   = 1 ;
        ELSE IF (achildcat in (0,1))      THEN oldchild   = 0 ;
        ELSE                                   oldchild   = . ;
		IF	(100 <= status1 <= 120) THEN depemp = 1 ;
		ELSE IF (200 <= status1 <= 240) THEN depemp = 0 ;
		ELSE depemp = . ;	
		agesq = age*age ;
        IF      (educ in (1,3))    THEN mededuc = 0 ;
        ELSE IF (educ = 2)         THEN mededuc = 1 ;
        ELSE                            mededuc = . ; 
        IF      (educ in (1,2))    THEN hieduc  = 0 ;
        ELSE IF (educ = 3)         THEN hieduc  = 1 ;
        ELSE                            hieduc  = . ;
        belgium=0;
        IF dname = 'be04' THEN belgium=1;
        greece =0;
        IF dname = 'gr04' THEN greece=1 ;      
    RUN ;

		DATA &ccyy ;
		 SET &ccyy ;	
				hourwage = hwage1 ;
				IF hourwage=. THEN DELETE;
				IF (hourwage < 0 ) THEN hourwage = 0 ;
				hourwagelog=log(hourwage); 
				IF( (hourwagelog=.)  AND (hourwage^=.) ) THEN hourwagelog=0;
	PROC SORT DATA=&ccyy ;
	  BY did hourwagelog;
RUN ;
				
		PROC SORT DATA=&ccyy ;
			BY did hourwagelog;
		RUN ;

		PROC UNIVARIATE DATA=&ccyy NOPRINT ;
		  VAR hourwagelog ;
			WEIGHT ppopwgt ;
				OUTPUT OUT= temp P25=q25   P75=q75; 
		RUN ;
		DATA _NULL_; 
		  SET temp; 
			CALL SYMPUT("b",q25); 
			CALL SYMPUT("t",q75); 
		RUN; 
		DATA &ccyy ;
		 SET &ccyy ;
			iqr=&t-&b; 
			upper_bound=&t + (iqr * 3); 
			lower_bound=&b - (iqr * 3); 
			IF hourwage>exp(upper_bound) THEN hourwage=exp(upper_bound); 
			IF hourwage<exp(lower_bound) THEN hourwage=exp(lower_bound); 
			IF dname = 'be04' THEN ppp=0.86 ;
        ELSE IF dname = 'gr04' THEN ppp=0.65 ;
        ELSE ppp=1    ;
        hourwage_ppp = hourwage/ppp      ;
         logwage_ppp = LOG(hourwage_ppp) ;
    RUN ;

    %IF %EVAL(&i) = 1 %THEN 
      %DO ;
        DATA current ;
         SET &ccyy ;
        RUN ;
      %END ;
    %ELSE 
      %DO ;
        PROC APPEND BASE=current DATA=&ccyy FORCE ;
        RUN ;
      %END ;
    %LET i = %EVAL(&i+1) ;
  %END ;

  PROC SORT DATA=current;
    BY sex;
  RUN ;

  PROC SURVEYREG DATA=current;
    BY     sex ;
    WEIGHT ppopwgt      ;
    MODEL  logwage_ppp =age agesq mededuc hieduc immigr partner youngchild oldchild
     ptime1 homeowner belgium greece ;
  RUN ;

%MEND pool ;
%LET all = us04 be04 gr04 ;
%pool